#include "BaseMemoryLib.h"//For CopyMem
#include "SHA256.h"
#include "HelperAsm.h"
//#include "Debug.h"
UINT32 VarW0_63[80];	//VarW0_63	LABEL DWORD                   ; Rename for SHA256
						//VarW16_79	dd 80 dup (0)
const UINT32 ConstK0_63[64] = {
	CONST_K00,
	CONST_K01,
	CONST_K02,
	CONST_K03,
	CONST_K04,
	CONST_K05,
	CONST_K06,
	CONST_K07,
	CONST_K08,
	CONST_K09,
	CONST_K10,
	CONST_K11,
	CONST_K12,
	CONST_K13,
	CONST_K14,
	CONST_K15,
	CONST_K16,
	CONST_K17,
	CONST_K18,
	CONST_K19,
	CONST_K20,
	CONST_K21,
	CONST_K22,
	CONST_K23,
	CONST_K24,
	CONST_K25,
	CONST_K26,
	CONST_K27,
	CONST_K28,
	CONST_K29,
	CONST_K30,
	CONST_K31,
	CONST_K32,
	CONST_K33,
	CONST_K34,
	CONST_K35,
	CONST_K36,
	CONST_K37,
	CONST_K38,
	CONST_K39,
	CONST_K40,
	CONST_K41,
	CONST_K42,
	CONST_K43,
	CONST_K44,
	CONST_K45,
	CONST_K46,
	CONST_K47,
	CONST_K48,
	CONST_K49,
	CONST_K50,
	CONST_K51,
	CONST_K52,
	CONST_K53,
	CONST_K54,
	CONST_K55,
	CONST_K56,
	CONST_K57,
	CONST_K58,
	CONST_K59,
	CONST_K60,
	CONST_K61,
	CONST_K62,
	CONST_K63
};

SHA256_VAR gSHA256Var;
void Hash256Target(UINT8 *pMsg, UINT32 MsgLen, UINT8 *pHashOut)
{
	UINT32 UpdLen = 0;
	UINT32 TailLen = 0;
	UINT32 AccumulatedLen = 0;

	UpdLen = MsgLen & (~0x3F);
	TailLen = MsgLen & 0x3F;

	/*	Intialize the Hash value with first 32 bits of the fractional parts of 
		the square roots of the first 8 primes 2..19
	*/
	gSHA256Var.VarH00 = CONST_H00;
	gSHA256Var.VarH01 = CONST_H01;
	gSHA256Var.VarH02 = CONST_H02;
	gSHA256Var.VarH03 = CONST_H03;
	gSHA256Var.VarH04 = CONST_H04;
	gSHA256Var.VarH05 = CONST_H05;
	gSHA256Var.VarH06 = CONST_H06;
	gSHA256Var.VarH07 = CONST_H07;
	AccumulatedLen = 0;

	Hash256Update(pMsg, UpdLen, &AccumulatedLen);
	
	Hash256Final(pMsg + UpdLen, TailLen, &AccumulatedLen, pHashOut);
}

/*Description: Process the message in successive 512-bit chunks/blocks using SHA-256*/
void Hash256Update(UINT8 *pMsg, UINT32 MsgLen, UINT32 *pAccumulatedLen)
{
	UINT32 u32ChunkCnt; //count of block/chunk of length 64 bytes (512 bits)
	UINT8 *pLMsg = pMsg;
	
	if(0x3F & MsgLen){
		/*length of message to be hashed must be multiple of 64 bytes*/
		return;
	}
	u32ChunkCnt = MsgLen;
	u32ChunkCnt = u32ChunkCnt >> 6; //Block Count, each block/chunk is of length 64 bytes (512 bits)

	if(!u32ChunkCnt){
		/*Nothing to do*/
		return;
	}

	while(u32ChunkCnt--){
		Sha256Block(pLMsg);
		pLMsg = (UINT8 *)(pLMsg + 64);
	}
	
	*pAccumulatedLen = *pAccumulatedLen + MsgLen;
}

/*Description: Process the message's trailing bytes (<64 bytes) using SHA-256*/
void Hash256Final(UINT8 *pMsg, UINT32 MsgLen, UINT32 *pAccumulatedLen, UINT8 *pHashOut)
{
	UINT32 u32Indx = 0;
	UINT32 u32Temp = 0;
	UINT8 *pu8Temp = 0;
	UINT32 *pu32Temp = NULL;
	UINT32 *pu32TempOut = (UINT32 *)pHashOut;

	UINT32 u32DWsCountInMsg = 0;
	
	/*These two buffers below are to help to append k bits '0', 
	where k is the minimum number >= 0 such that the resulting message length (in bits) is congruent to 448 (mod 512)
	*/
	UINT32 PadBuf00[16];
	UINT32 PadBuf01[16];

	if(MsgLen > 64){
		return;
	}

	ZeroMem(PadBuf00,16*4);
	ZeroMem(PadBuf01,16*4);
	/*1: Copy the message to local buffer PadBuf00*/
	u32DWsCountInMsg = MsgLen/4; //Max it will go upto 16
	/*1.1: First copy the DWords*/
	for(u32Indx = 0; u32Indx < u32DWsCountInMsg; u32Indx++){
		//PadBuf00[u32Indx] = *((UINT32 *)pMsg); //Lakshman: sometimes it was getting optimized to use intrinsic memcpy
		CopyMem(&PadBuf00[u32Indx],pMsg, 4); 
		pMsg += 4;
	}
	/*1.2: Copy the remaining bytes*/
	pu8Temp = (UINT8 *)&PadBuf00[u32DWsCountInMsg]; //last DWord in PadBuf00 to keep remaining bytes
	for(u32Indx = 0; u32Indx < MsgLen%4; u32Indx++){
		//pu8Temp[u32Indx] = *pMsg; //Lakshman: sometimes it was getting optimized to use intrinsic memcpy
		CopyMem(&pu8Temp[u32Indx], pMsg, 1);
		pMsg++;
	}
	/*Append bit '1' to Message, There is always atleast 1 byte available in the buffer, u32DWsConutInMsg will always be < 64*/
	pu8Temp[u32Indx] = 0x80;
	
	/*Prepare message length to store, this is the origianal message length to be hashed 
	i.e. sum of message length (in bits) passed to this function and length (in bits) already hashed by Hash256Update()
	Assumes max message length 536,870,911 (0x1FFFFFFF)bytes*/
	u32Temp = MsgLen + *pAccumulatedLen;
	u32Temp = u32Temp * 8;
	ByteSwapDWord(&u32Temp); //To Big Endian

	/*append length of message (before pre-processing), in bits, as 64-bit big-endian integer,
	after appending k bits '0', where k is the minimum number >= 0 
	such that the resulting message length (in bits) is congruent to 448 (mod 512)*/
	if(MsgLen <= (64 - 9)){ //Byte with 1, plus 8 bytes for length
		PadBuf00[15] = u32Temp; //In last 4 bytes
	}
	else{
		PadBuf01[15] = u32Temp;//In last 4 bytes
	}

	//Hash buffer 0
	Sha256Block((UINT8 *)PadBuf00);

	if(MsgLen > (64 - 9)){
		Sha256Block((UINT8 *)PadBuf01);
	}

	pu32Temp = &gSHA256Var;
	for(u32Indx = 0; u32Indx < 8; u32Indx++){
		ByteSwapDWord(pu32Temp);//we have got the hash in big-endian, lets change it to little endian
		*pu32TempOut++ = *pu32Temp++;
	}

	*pAccumulatedLen = *pAccumulatedLen + MsgLen;
}


/*
Description: Hashes 64 bytes (512 bits) of message using SHA-256 procedure
*/
void Sha256Block(UINT8 *pMsg)
{
	UINT32 u32Indx;
	UINT32 u32Temp;
	/*These varibles are no more needed. Used earliar during porting from Assembly code which uses 
	byte based address calculation 
	UINT32 *pu32Temp; 
	UINT8 *pu8Temp;
	*/
	UINT32 u32Temp1;
	UINT32 u32Temp2;
	
	for(u32Indx = 0; u32Indx < 64; u32Indx++){
		/*break the chunk into sixteen 32-bit big-endian words w[0..15]*/
		if(u32Indx < 16){ //for i from 0 to 15
			u32Temp = *((UINT32 *)(pMsg + (u32Indx * 4)));
			ByteSwapDWord(&u32Temp);
						
			VarW0_63[u32Indx] = u32Temp;
			/*pu8Temp = (UINT8 *)VarW0_63;
			pu32Temp = (UINT32 *)(pu8Temp + (u32Indx * 4));
			*pu32Temp = u32Temp;*/
		}
		else{
			/*Extend the sixteen 32-bit words into sixty-four (including previous 16) 32-bit words
			//for i from 16 to 63
			s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
			s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
			w[i] := w[i-16] + s0 + w[i-7] + s1
			*/
			u32Temp = F_TETA1(VarW0_63[u32Indx - 2]);	//s1 = (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
			VarW0_63[u32Indx] = u32Temp;	//w[i] = s1
			//pu8Temp = (UINT8 *)VarW0_63;
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4 - 2*4);
			//u32Temp = F_TETA1(*pu32Temp); //s1 = (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4);
			//*pu32Temp = u32Temp;		//w[i] = s1
			
			VarW0_63[u32Indx] = VarW0_63[u32Indx] + VarW0_63[u32Indx - 7]; //w[i] = s1 + w[i-7]
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4 - 7*4);
			//u32Temp = *pu32Temp;	//w[i-7]
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4);
			//*pu32Temp = *pu32Temp + u32Temp;	//w[i] = s1 + w[i-7]
			
			u32Temp = F_TETA0(VarW0_63[u32Indx - 15]);	//s0 = (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
			VarW0_63[u32Indx] = VarW0_63[u32Indx] + F_TETA0(VarW0_63[u32Indx - 15]); //w[i] = s1 + w[i-7] + s0
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4 - 15*4);
			//u32Temp = F_TETA0(*pu32Temp);	//s0 = (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4);
			//*pu32Temp = *pu32Temp + u32Temp; //w[i] = s1 + w[i-7] + s0

			u32Temp = VarW0_63[u32Indx - 16];
			VarW0_63[u32Indx] = VarW0_63[u32Indx] + u32Temp; //w[i] = s1 + w[i-7] + s0 + w[i-16]
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4 - 16*4);
			//u32Temp = *pu32Temp;	//w[i-16]
			//pu32Temp = (UINT32 *)(pu8Temp + u32Indx*4);
			//*pu32Temp = *pu32Temp + u32Temp;	//w[i] = s1 + w[i-7] + s0 + w[i-16]
		}
	}

	//Initialize Hash value for this chunk
	gSHA256Var.VarA = gSHA256Var.VarH00;
	gSHA256Var.VarB = gSHA256Var.VarH01;
	gSHA256Var.VarC = gSHA256Var.VarH02;
	gSHA256Var.VarD = gSHA256Var.VarH03;
	gSHA256Var.VarE = gSHA256Var.VarH04;
	gSHA256Var.VarF = gSHA256Var.VarH05;
	gSHA256Var.VarG = gSHA256Var.VarH06;
	gSHA256Var.VarH = gSHA256Var.VarH07;

	//Main loop
	for(u32Indx = 0; u32Indx < 64; u32Indx++){
		u32Temp1 = SetTEMP1(u32Indx);
		u32Temp2 = SetTEMP2();

		gSHA256Var.VarH = gSHA256Var.VarG;
		gSHA256Var.VarG = gSHA256Var.VarF;
		gSHA256Var.VarF = gSHA256Var.VarE;

		//gSHA256Var.VarE = gSHA256Var.VarD + u32Temp1;
		u32Temp = gSHA256Var.VarD;
		u32Temp = u32Temp + u32Temp1;
		gSHA256Var.VarE = u32Temp;

		gSHA256Var.VarD = gSHA256Var.VarC;
		gSHA256Var.VarC = gSHA256Var.VarB;
		gSHA256Var.VarB = gSHA256Var.VarA;
		gSHA256Var.VarA = u32Temp1 + u32Temp2;
	}

	//Add this chunk's hash to result so far
	gSHA256Var.VarH00 += gSHA256Var.VarA;
	gSHA256Var.VarH01 += gSHA256Var.VarB;
	gSHA256Var.VarH02 += gSHA256Var.VarC;
	gSHA256Var.VarH03 += gSHA256Var.VarD;
	gSHA256Var.VarH04 += gSHA256Var.VarE;
	gSHA256Var.VarH05 += gSHA256Var.VarF;
	gSHA256Var.VarH06 += gSHA256Var.VarG;
	gSHA256Var.VarH07 += gSHA256Var.VarH;


}
UINT32 SetTEMP1(UINT32 u32Indx)
{
	/*These varibles are no more needed. Used earliar during porting from Assembly code which uses 
	byte based address calculation
	UINT8 *pu8Temp = NULL;
	UINT32 *pu32Temp =  NULL;
	*/
	UINT32 u32Temp1 = gSHA256Var.VarH;	//t1 = h
	u32Temp1 = u32Temp1 + F_SIGMA1(gSHA256Var.VarE); //s1 = (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
													//t1 = h + s1

	u32Temp1 = u32Temp1 + F_CH(gSHA256Var.VarE, gSHA256Var.VarF, gSHA256Var.VarG); //ch = (e and f) xor ((not e) and g)
																//t1 = h + s1 + ch

	u32Temp1 = u32Temp1 + VarW0_63[u32Indx]; //t1 = h + s1 + ch + w[i]
	/*pu8Temp = (UINT8 *)VarW0_63;
	pu32Temp = (UINT32 *)(pu8Temp + u32Indx * 4);
	u32Temp1 = u32Temp1 + *pu32Temp;*/

	u32Temp1 = u32Temp1 + ConstK0_63[u32Indx]; //t1 = h + s1 + ch + w[i] + k[i]
	/*pu8Temp = (UINT8 *)ConstK0_63;
	pu32Temp = (UINT32 *)(pu8Temp + u32Indx * 4);
	u32Temp1 = u32Temp1 + *pu32Temp;*/
	
	return u32Temp1; //return t1
}
UINT32 SetTEMP2()
{
	UINT32 u32Temp2;
	u32Temp2 = F_SIGMA0(gSHA256Var.VarA); //s0 = (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
										//t2 = s0
	u32Temp2 = u32Temp2 + F_MAJ(gSHA256Var.VarA, gSHA256Var.VarB, gSHA256Var.VarC); //maj = (a and b) xor (a and c) xor (b and c)
																			//t2 = s0 + maj
	return u32Temp2; //return t2
}
